


		****************************************************************
		*                                                              *
		* Programs for implementing the Machado and Mata Decomposition *
		*                                                              *   
		****************************************************************



*** I. Program for applying the Machado - Mata Decomposition (original or standard) using QR estimates  ***
*** from fixed percentiles and producing Bootstraped Std Errors                                         ***


* To use this program:
* 1) Load the dataset which contains data (appended) on both samples that will be used in the decomposition
* 2) Generate a 0-1 dummy indicator which distinguishes the two samples and should be used in the by( ) requirement. 
* The generated Counterfactual combines estimated coefficients from the sample that this indicator is 0 and 
* characteristics from the sample that this indicator is 1 (the TOTAL number of obs are re-drawn from sample 1 
* to match the vector of QR coeff estimated at each percentile)
* 3) In every application, special attention should be given to what each pair of differences represents (covariate or 
* coefficient effects)
* 4) if the ORIGinal option is specified the program produces the estimated (instead of the actual) difference, 
* marginal cov. and coeff. effects as well as the estimated error.  
* 5) The generated counterfactual along with its weights (when they are required) are generated in the end of the program
* and should be used as inputs to the program mmpctile which is at the bottom of this file.
* 6) If the "se(#)" option is specified the program produces bootstraped standard errors for each percentile. 
* To accelerate the process it uses datasets with bootstrapped QR bs that have been created and saved by the do: "collect QR_bs"
* The path with the folder which contains the sub-folder ("0") in which these datasets have been saved MUST be specified in 
* the global foldpse. In case that the "se(#)" option is specified with ORIGinal, then bootstrapped QR bs are also
* drawn from the sub-folder ("1") (Note that as long as bootstrapped QR bs from sample 1 have been saved in the sub-folder "1"
* global area must be set to 1). All the values that result from the above process and they will be used to estimate the 
* standard errors are saved in the file "countse.dta" that is in the so-called sub-folder.
* The number specified in se(#) MUST be the number of the datasets in su-folder ("0") (and ("1") if the option ORIGinal
* has required)!
* 7) OPTION P can take only the values 20, 50, 100, 200, 1000 defining the # of percentiles at which the counterfactual
* values will be estimated (the default is 20). IF the SE option is simultaneously specified, then P MUST be set
* to the SAME number of percentiles that have been stored in the datasets with bootstrapped QR bs !


* Syntax:
* mmdeco depvar indepvars, [P(#) Weight(varname) SE(#)] by(varname)

* Example of Syntax (MM decomposition of y between two samples distinguished by the variable ind;
* producing counterfactuals evaluated at 19 percentiles (every 5p) with bootstraped std erros evaluated at the same percentiles
* (the number of pre-saved datasets with bootstrapped QR bs are 500); weights to be taken into account)
* mmdeco y x1 x2 x3, p(20) se(500) w(wgtach) by(ind)

*~*~*~*~*~*~*~*


**********************************************************************

cap program drop mmdeco
program define mmdeco

version 9.2

* Define the path where the datasets with bootstrapped QR bs have been saved
do "c:/foldp/foldp.do"
global foldpse "${foldp}/Wealth Counterfactuals/Verification/Data/DG/${asset}/countse/"
*global foldpse "c:/work/Integration/Data/temp/${asset}/countse/"

set more off

* SYNTAX
syntax  varlist(default=none) [, P(integer 20) Weight(varname) SE(string) ORIGinal] by(varname)
 


* Distinguish LHS, RHS variables
local depvar "`1'"
local rhs "`2'"
tokenize `varlist'
local nv =wordcount("`varlist'")
forvalues i = 3/`nv' {
local l: word `i' of `varlist'
local rhs `rhs' `l'
}


qui save temp_orig, replace


* LOOP at each percentile
forvalues _n=1/99 {

use temp_orig, clear


local q = `_n'

* QR regression in the sub-sample with `by'==0
capture qreg `depvar' `rhs' if `by'==0, quantile(`q')
local rc=_rc 
if `rc'!=0 {
noisily di in red "convergence not achieved in" in yellow " prct:`q'" in green " for reg 0"
}

* Collect QR_0 estimates
capture matrix bN`_n' = (e(b))'


* KEEP the sub-sample with `by'==1
quietly keep if `by'==1
* Draw from ALL observations (with `by'==1)
set seed 12345`_n'6
bsample 
* Create a vector of ones
gen constant=1

* Collect obs in one matrix: # of obs. x numb. of regressors(plus constant)
mkmat `rhs' constant, matrix (xR`_n')
* Collect corresponding weights in a matrix-collumn
mkmat `weight', matrix(cwgt)

* Create a matrix-collumn with # Counterfactual values for a given percentile `q'
capture matrix counter`_n'=xR`_n'* bN`_n'



*****************************************************************************

* SAVE matrix elements as data
quietly drop _all
* In the first loop convert matrices into data and save
if `_n'==1{
capture svmat counter`_n'
capture rename counter`_n'1 counter
svmat cwgt
rename cwgt1 cwgt

quietly gen qid=`q'
qui gen nid = _n
quietly save temp_cf,replace
display in green "." _continue
}  /* _n==1 */

* From the second loop and on convert matrices into data and append to the existing datasets  
if `_n'>1{
capture svmat counter`_n'
capture rename counter`_n'1 counter
capture replace counter=. if `rc'~=0
svmat cwgt
rename cwgt1 cwgt

quietly gen qid=`q'
qui gen nid = _n
append using temp_cf
quietly save temp_cf,replace
display in green "." _continue
}  /* _n>1 */


} /* loop `_n' closing bracket  */


*** Save/Arrange the Data - to be used by program II.***




*******************************************************************************

quietly {
use temp_orig, clear

ge double A = `depvar' if `by'==1
ge double awgt = `weight' if `by'==1

ge double B = `depvar' if `by'==0
ge double bwgt = `weight' if `by'==0

save temp_orig, replace

use temp_cf, clear

merge using temp_orig, keep(A awgt B bwgt)
drop _merge

pctile pa = A [w=awgt], n(100)
pctile pb = B [w=bwgt], n(100)
ge diff = pb - pa

pctile pcf = counter [w=cwgt], n(100)
ge p = _n in 1/99
ge cvef = pb - pcf
ge ref = pcf - pa

sort p
save "${foldpse}counterf_orig", replace


} /* quietly closing bracket */







**************************************************************************************************************************************************
******************************************************************************
*** SE ***

if "`se'" != "" {

    forvalues j = 1/`se'  {
   
   noisily di "`j'"
   forvalues _n=1/99 {
   
qui {
   use temp_orig, clear
   keep if `by'==1
   drop A awgt B bwgt
   set seed 34`_n'5`j'6
   bsample 
   save tempA_se, replace
   
   use temp_orig, clear
   keep if `by'==0
   drop B bwgt A awgt
   set seed 56`_n'7`j'8
   bsample
   save tempB_se, replace
   
   append using tempA_se
   }
   qui save temp_se, replace
   
   local q =`_n'
   
   * QR regression in the sub-sample with `by'==0
   capture qreg `depvar' `rhs' if `by'==0, quantile(`q')
   local rc=_rc 
   if `rc'!=0 {
   noisily di in red "convergence not achieved in" in yellow " prct:`q'" in green " for reg 0"
   }
   
   * Collect QR_0 estimates
   capture matrix bN`_n' = (e(b))'
   
   
   * KEEP the sub-sample with `by'==1
   quietly keep if `by'==1
   * Draw from ALL observations (with `by'==1)
   set seed 67`_n'8`j'9
   bsample 
   * Create a vector of ones
   gen constant=1
   
   * Collect obs in one matrix: # of obs. x numb. of regressors(plus constant)
   mkmat `rhs' constant, matrix (xR`_n')
   * Collect corresponding weights in a matrix-collumn
   mkmat `weight', matrix(cwgt)
   
   * Create a matrix-collumn with # Counterfactual values for a given percentile `q'
   capture matrix counter`_n'=xR`_n'* bN`_n'
   
   
   
   
   *****************************************************************************
   
   * SAVE matrix elements as data
   quietly drop _all
   * In the first loop convert matrices into data and save
   if `_n'==1{
   capture svmat counter`_n'
   capture rename counter`_n'1 counter
   svmat cwgt
   rename cwgt1 cwgt
   
   quietly gen qid=`q'
   qui gen nid = _n
   quietly save tempse_cf,replace
   display in green "." _continue
   }  /* _n==1 */
   
   * From the second loop and on convert matrices into data and append to the existing datasets  
   if `_n'>1{
   capture svmat counter`_n'
   capture rename counter`_n'1 counter
   capture replace counter=. if `rc'~=0
   svmat cwgt
   rename cwgt1 cwgt
   
   quietly gen qid=`q'
   qui gen nid = _n
   append using tempse_cf
   quietly save tempse_cf,replace
   display in green "." _continue
   }  /* _n>1 */
   
   
   } /* loop `_n' closing bracket  */
   
   
   *** Save/Arrange the Data - to be used by program II.***
   
   
   
   
   *******************************************************************************
   
   quietly {
   use temp_se, clear
   
   ge double A = `depvar' if `by'==1
   ge double awgt = `weight' if `by'==1
   
   ge double B = `depvar' if `by'==0
   ge double bwgt = `weight' if `by'==0
   
   save temp_se, replace
   
   use tempse_cf, clear
   
   merge using temp_se, keep(A awgt B bwgt)
   drop _merge
   
   pctile pa = A [w=awgt], n(100)
   pctile pb = B [w=bwgt], n(100)
   ge diff = pb - pa
   
   pctile pcf = counter [w=cwgt], n(100)
   ge p=_n
   
   ge cvef_`j' = pb - pcf
   ge ref_`j' = pcf - pa
   
   keep if p<=99
   keep p  ref_`j' cvef_`j'
   sort p
   
   save "${foldpse}counterf_se`j'", replace
   
   
     } /* quietly closing bracket */
  }  /* loop `j' closing bracket */

quietly {

forvalues j = 1/`se'  {
           use "${foldpse}counterf_orig", clear
           sort p
           merge p using "${foldpse}counterf_se`j'"
           drop _merge
           sort p
           save "${foldpse}counterf_orig", replace
}

egen SE_ref = rowsd(ref_*)
egen SE_cvef = rowsd(cvef_*)

   ge u_ref = ref + 1.96*SE_ref
   ge l_ref = ref - 1.96*SE_ref

   ge u_cvef = cvef + 1.96*SE_cvef
   ge l_cvef = cvef - 1.96*SE_cvef

} /* quietly */


} /* option "se" closing bracket */

end

exit



* mmdeco ${depvar} ${indepvars}, p(20) w(wgtach) se(100) by(ind)






